generate_plot = True

# imports
import pandas as pd
import os
import shutil
import glob
from re import findall, compile
import datetime
import ctypes

# define working directory
try:
    workingDirectory = os.path.dirname(__file__)
except:
    workingDirectory = str(os.getcwd())

# create list all the CSVs in the folder containing 'time_' in file name
os.chdir(workingDirectory)
list_of_csvs = glob.glob('*.{}'.format("csv"))
regex = compile(r'time_'); list_of_csvs = [i for i in list_of_csvs if regex.search(i)]
print("The following CSVs will be looped through...")
print(list_of_csvs)

The following CSVs will be looped through...
['time_0xZn_1_Ch1_240401_094500.csv', 'time_0xZn_1_Ch1_240402_161000.csv', 'time_0xZn_1_Ch1_240403_091000.csv', 'time_0xZn_1_Ch1_240404_093000.csv', 'time_0xZn_1_Ch1_240405_093000.csv', 'time_0xZn_1_Ch1_240408_094500.csv', 'time_0xZn_1_Ch1_240517_131500.csv']

# define list to store width (number of columns) for each dataframe (these should always be equal)
df_widths = []

# create distances dataframe (same for every file in folder)
df_distances = pd.read_csv(list_of_csvs[0], header=None, names=["Distance","Amplitude"]).T
df_distances = df_distances[0:1]
df_distances = df_distances.reset_index()

df_distances_width = df_distances.shape[1]
df_widths.append(df_distances_width)

# loop through the files, transpose, and add single rows to the df_all_amplitures dataframe
df_all_amplitudes =  pd.DataFrame()

num_csvs_looped_though = 0

for current_csv_file in list_of_csvs:

    print("Processing: " + current_csv_file + " (" + str(num_csvs_looped_though+1) + " of " + str(len(list_of_csvs)) + ")")

    # get timestemp
    try:
        datetime_from_file_name = findall(r'_([0-9]{6}_[0-9]{5,})\.csv', current_csv_file)[0]
    except:
        ctypes.windll.user32.MessageBoxW(0, "The are CSV files (excluding raw_data.csv) that do not match the regular expression below:\n\n_([0-9]{6}_[0-9]{5,})\.csv", "Error", 1)

    year = datetime_from_file_name[0:2]
    month = datetime_from_file_name[2:4]
    day = datetime_from_file_name[4:6]
    time = findall(r'_([0-9]{5,})\.csv', current_csv_file)[0]
    hour = time[0:2]
    minute = time[2:4]
    full_timestamp = "20" + year + "-" + month + "-" + day + "T" + hour + ":" + minute + ":00+00:00"
    
    # read and transpose CSV
    df_amplitude = pd.read_csv(current_csv_file, header=None, names=["Distance","Amplitude"]).T
    df_amplitude = df_amplitude[1:2]
    df_amplitude = df_amplitude.reset_index()

    df_amplitude_width = df_amplitude.shape[1]
    df_widths.append(df_distances_width)

    # replace first (leftmost) value with timestamp
    df_amplitude['index'] = df_amplitude['index'].replace(['Amplitude'],full_timestamp)
    df_all_amplitudes = pd.concat([df_all_amplitudes, df_amplitude], ignore_index=True)
    
    # add to counter
    num_csvs_looped_though = num_csvs_looped_though + 1

Processing: time_0xZn_1_Ch1_240401_094500.csv (1 of 7)
Processing: time_0xZn_1_Ch1_240402_161000.csv (2 of 7)
Processing: time_0xZn_1_Ch1_240403_091000.csv (3 of 7)
Processing: time_0xZn_1_Ch1_240404_093000.csv (4 of 7)
Processing: time_0xZn_1_Ch1_240405_093000.csv (5 of 7)
Processing: time_0xZn_1_Ch1_240408_094500.csv (6 of 7)
Processing: time_0xZn_1_Ch1_240517_131500.csv (7 of 7)

# sort dataframe (so earliest readings are at top)
df_all_amplitudes = df_all_amplitudes.sort_values(['index'], ascending = [True])

# union distances (top row only) and all amplitudes dataframes
df_final = pd.concat([df_distances, df_all_amplitudes], ignore_index=True)
df_final['index'] = df_final['index'].replace(['Distance'],'')
df_final.head()

# check dataframe widths are equal
df_widths = list(set(df_widths))
if len(df_widths) == 1:
    print("Dataframe widths are equal, no error")
else:
    ctypes.windll.user32.MessageBoxW(0, "Dataframe widths are unequal, all CSV files should have the same number of rows.", "Error", 1)

Dataframe widths are equal, no error

# output CSV
output_file_name = "raw_data.csv"

try:
    os.remove(output_file_name)
except:
    pass

df_final.to_csv(output_file_name, index=False, header=False)

# generate and save plots
if generate_plot == True:
    import matplotlib.pyplot as plt

    def make_plot(truncate_dist):

        df_final_t = df_final.T.reset_index()
        df_final_t.columns = df_final_t.iloc[0]
        df_final_t = df_final_t[1:]
        df_final_t = df_final_t.rename(columns={df_final_t.columns[1]: 'Distance'})
        df_final_t = df_final_t.drop(['index'], axis=1)
        df_final_t = df_final_t.iloc[:, [0, 1, -1]]  # get first, second, and last columns
        
        if truncate_dist == True:
            max_index_first = pd.to_numeric(df_final_t.iloc[:, 1], errors='coerce')
            max_index_first = max_index_first.idxmax()

            max_index_last = pd.to_numeric(df_final_t.iloc[:, 2], errors='coerce')
            max_index_last = max_index_last.idxmax()

            maxs_added = max_index_first + max_index_last
            df_final_t = df_final_t.iloc[:maxs_added, :]

            
        formatted_time_first = df_final_t.columns[1]
        formatted_time_first= formatted_time_first.replace('T',' ') 
        formatted_time_first = formatted_time_first.replace(':00+00:00','') 

        formatted_time_last = df_final_t.columns[2]
        formatted_time_last= formatted_time_last.replace('T',' ') 
        formatted_time_last = formatted_time_last.replace(':00+00:00','') 

        plt.figure(figsize=(14, 8))

        # Plot first recent reading
        plt.scatter(df_final_t['Distance'], df_final_t.iloc[:, 1], label='First reading')
        plt.plot(df_final_t['Distance'], df_final_t.iloc[:, 1], linestyle='-', linewidth=0.5, color='blue', alpha=0.5)

        # Plot most recent reading
        plt.scatter(df_final_t['Distance'], df_final_t.iloc[:, 2], label='Most recent reading')
        plt.plot(df_final_t['Distance'], df_final_t.iloc[:, 2], linestyle='-', linewidth=0.5, color='red', alpha=0.5)


        plt.xlabel(df_final_t.columns[0])  # Label for x-axis (first column name)
        plt.ylabel("Amplitude")  # Label for y-axis (second column name)
        plt.ylim(bottom=0)

        chart_title = "Amplitudes at " + formatted_time_first + " (first reading) and " + formatted_time_last + " (most recent reading)"
        if truncate_dist == True:
            chart_title = chart_title + " [DISTANCE TRUNCATED]"

        plt.title(chart_title)
        plt.legend()

        image_title = "Most recent and first readings"
        if truncate_dist == True:
            image_title = image_title + " (distance truncated)"
        image_title = image_title + ".png"
        
        try:
            os.remove(image_title)
        except:
            pass
        plt.savefig(image_title, format='png', dpi=300)
        #plt.show()

    make_plot(truncate_dist=False)
    try:
        make_plot(truncate_dist=True)  
    except:
        pass

	index	0	1	2	3	4	5	6	7	8	...	1190	1191	1192	1193	1194	1195	1196	1197	1198	1199
0		0.000000	0.107207	0.214414	0.321622	0.428829	0.536036	0.643243	0.750450	0.857658	...	127.576577	127.683784	127.790991	127.898198	128.005405	128.112613	128.219820	128.327027	128.434234	128.541441
1	2024-04-01T09:45:00+00:00	0.069553	0.036079	0.016572	0.010159	0.018783	0.014729	0.012764	0.014193	0.021974	...	0.001055	0.002097	0.001326	0.001961	0.006971	0.011156	0.009833	0.008984	0.014437	0.031000
2	2024-04-02T16:10:00+00:00	0.063265	0.036869	0.016237	0.010420	0.018407	0.014863	0.012432	0.013837	0.021768	...	0.000661	0.001665	0.000946	0.002118	0.006235	0.009632	0.009815	0.008587	0.015554	0.030573
3	2024-04-03T09:10:00+00:00	0.048215	0.032925	0.016320	0.010914	0.016880	0.014081	0.012564	0.013948	0.022045	...	0.000173	0.001283	0.000691	0.001887	0.005185	0.006122	0.009726	0.010025	0.015649	0.032935
4	2024-04-04T09:30:00+00:00	0.070546	0.037625	0.017032	0.009807	0.019065	0.014699	0.012599	0.013919	0.021953	...	0.000979	0.001985	0.001038	0.001870	0.006644	0.011179	0.009658	0.008618	0.014939	0.030755